Figure 1 Simonsohn, Simmons, & Nelson, 2015
Table 1 Simonsohn, Simmons, & Nelson, 2015
Figure 2 Simonsohn, Simmons, & Nelson, 2015
Table2 Simonsohn, Simmons, & Nelson, 2015
Figure 6 Orben & Przybylski, 2019
if (!require(tidyverse)) {
install.packages('tidyverse')
}
if (!require(purrr)) {
install.packages('purrr')
}
if (!require(broom)) {
install.packages('broom')
}
if (!require(cowplot)) {
install.packages('cowplot')
}map from purrr# specify models
models = list(mpg ~ cyl,
mpg ~ cyl + hp,
mpg ~ cyl * hp)
# run models and extract parameter estimates and stats
(model_params = map(models, ~lm(.x, data = mtcars)) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(tidied = purrr::map(model, broom::tidy),
model_num = row_number()) %>%
select(model_num, tidied) %>%
unnest())# run models and extract model fits
(model_fits = purrr::map(models, ~lm(.x, data = mtcars)) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(model_num = row_number(),
AIC = map_dbl(model, AIC),
BIC = map_dbl(model, BIC)) %>%
select(-model))# join dataframes and select model fits and parameter estimates
model_params %>%
select(model_num, term, estimate) %>%
spread(term, estimate) %>%
left_join(., model_fits) %>%
arrange(AIC)dredge from MuMIn# set na.action for dredge
options(na.action = "na.fail")
# omit NAs
mtcars.na = mtcars %>%
na.omit()
# run full model
full.model = lm(mpg ~ cyl*hp, data = mtcars.na)
# run all possible nested models
(all.models = MuMIn::dredge(full.model, rank = "AIC", extra = "BIC"))dredge# run full model
full.model = lm(mpg ~ cyl*as.factor(vs), data = mtcars.na)
# run all possible nested models
MuMIn::dredge(full.model, rank = "AIC", extra = "BIC") %>%
select(AIC, BIC, everything())map# specify models
models = list(mpg ~ 1,
mpg ~ cyl,
mpg ~ as.factor(vs),
mpg ~ cyl + as.factor(vs),
mpg ~ cyl*as.factor(vs))
# run models and extract parameter estimates and stats
model_params = map(models, ~lm(.x, data = mtcars)) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(tidied = purrr::map(model, broom::tidy),
model_num = row_number()) %>%
select(model_num, tidied) %>%
unnest()
# run models and extract model fits
model_fits = purrr::map(models, ~lm(.x, data = mtcars)) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(model_num = row_number(),
AIC = map_dbl(model, AIC),
BIC = map_dbl(model, BIC)) %>%
select(-model)
# join dataframes and select model fits and parameter estimates
(models.sca = model_params %>%
select(model_num, term, estimate) %>%
spread(term, estimate) %>%
left_join(., model_fits) %>%
arrange(AIC)%>%
select(AIC, BIC, everything()))# specify mpg ~ cyl as the null model for comparison
null.df = models.sca %>%
filter(model_num == 2)
# tidy for plotting
plot.data = models.sca %>%
arrange(AIC) %>%
mutate(specification = row_number(),
better.fit = ifelse(AIC == null.df$AIC, "equal",
ifelse(AIC < null.df$AIC, "yes","no")))
# get names of variables included in model
variable.names = names(select(plot.data, -model_num, -starts_with("better"), -specification, -AIC, -BIC))
# plot top panel
top = plot.data %>%
ggplot(aes(specification, AIC, color = better.fit)) +
geom_point(shape = "|", size = 4) +
geom_hline(yintercept = null.df$AIC, linetype = "dashed", color = "lightblue") +
scale_color_manual(values = c("lightblue", "black", "red")) +
labs(x = "", y = "AIC\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# plot bottom panel
bottom = plot.data %>%
gather(variable, value, eval(variable.names)) %>%
mutate(value = ifelse(!is.na(value), "|", "")) %>%
ggplot(aes(specification, variable, color = better.fit)) +
geom_text(aes(label = value)) +
scale_color_manual(values = c("lightblue", "black", "red")) +
labs(x = "\nspecification number", y = "variables\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# join panels
cowplot::plot_grid(top, bottom, ncol = 1, align = "v", labels = c('A', 'B'))# set plotting order for variables based on number of times it's included in better fitting models
order = plot.data %>%
arrange(AIC) %>%
mutate(better.fit.num = ifelse(better.fit == "yes", 1, 0)) %>%
rename("intercept" = `(Intercept)`,
"vs" = `as.factor(vs)1`,
"cyl x vs" = `cyl:as.factor(vs)1`) %>%
gather(variable, value, -model_num, -starts_with("better"), -specification, -AIC, -BIC) %>%
filter(!is.na(value)) %>%
group_by(variable) %>%
mutate(order = sum(better.fit.num)) %>%
select(variable, order) %>%
unique()
# rename variables and plot bottom panel
bottom = plot.data %>%
gather(variable, value, eval(variable.names)) %>%
mutate(value = ifelse(!is.na(value), "|", ""),
variable = ifelse(variable == "(Intercept)", "intercept",
ifelse(variable == "as.factor(vs)1", "vs",
ifelse(variable == "cyl:as.factor(vs)1", "cyl x vs", variable)))) %>%
left_join(., order, by = "variable") %>%
ggplot(aes(specification, reorder(variable, order), color = better.fit)) +
geom_text(aes(label = value)) +
scale_color_manual(values = c("lightblue", "black", "red")) +
labs(x = "\nspecification number", y = "variables\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# join panels
cowplot::plot_grid(top, bottom, ncol = 1, align = "v", labels = c('A', 'B'))dredge# run full model
full.model = lm(mpg ~ cyl + disp + hp + drat + wt + qsec + as.factor(vs) + am + gear + carb, data = mtcars.na)
# run all possible nested models
models.sca = MuMIn::dredge(full.model, rank = "AIC", extra = "BIC")# tidy for plotting
plot.data = models.sca %>%
arrange(AIC) %>%
mutate(specification = row_number(),
better.fit = ifelse(AIC == null.df$AIC, "equal",
ifelse(AIC < null.df$AIC, "yes","no"))) %>%
gather(variable, value, -starts_with("better"), -specification, -AIC, -BIC, -df, -logLik, -delta, -weight) %>%
mutate(variable = gsub("[()]", "", variable),
variable = gsub("Intercept", "intercept", variable),
variable = gsub("as.factor(vs)", "vs", variable)) %>%
spread(variable, value)
# get names of variables included in model
variable.names = names(select(plot.data, -starts_with("better"), -specification, -AIC, -BIC, -df, -logLik, -delta, -weight))
# plot top panel
top = plot.data %>%
ggplot(aes(specification, AIC, color = better.fit)) +
geom_point(shape = "|", size = 4) +
geom_hline(yintercept = null.df$AIC, linetype = "dashed", color = "lightblue") +
scale_color_manual(values = c("lightblue", "black", "red")) +
labs(x = "", y = "AIC\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# set plotting order for variables based on number of times it's included in better fitting models
order = plot.data %>%
arrange(AIC) %>%
mutate(better.fit.num = ifelse(better.fit == "yes", 1, 0)) %>%
gather(variable, value, eval(variable.names)) %>%
filter(!is.na(value)) %>%
group_by(variable) %>%
mutate(order = sum(better.fit.num)) %>%
select(variable, order) %>%
unique()
# rename variables and plot bottom panel
bottom = plot.data %>%
gather(variable, value, eval(variable.names)) %>%
mutate(value = ifelse(!is.na(value), "|", ""),
variable = ifelse(variable == "(Intercept)", "intercept",
ifelse(variable == "as.factor(vs)1", "vs", variable))) %>%
left_join(., order, by = "variable") %>%
ggplot(aes(specification, reorder(variable, order), color = better.fit)) +
geom_text(aes(label = value)) +
scale_color_manual(values = c("lightblue", "black", "red")) +
labs(x = "\nspecification number", y = "variables\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# join panels
cowplot::plot_grid(top, bottom, ncol = 1, align = "v", labels = c('A', 'B'))# extract parameter estimate (step-by-step output)
MuMIn::get.models(models.sca, subset = TRUE) %>%
tibble() %>%
rename("model" = ".")MuMIn::get.models(models.sca, subset = TRUE) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(tidied = purrr::map(model, broom::tidy),
model_num = row_number()) %>%
select(model_num, tidied) %>%
unnest()# extract parameter estimate
(model_params = MuMIn::get.models(models.sca, subset = TRUE) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(tidied = purrr::map(model, broom::tidy),
model_num = row_number()) %>%
select(model_num, tidied) %>%
unnest() %>%
select(model_num, term, estimate) %>%
spread(term, estimate))# extract p-values for the intercept term
(model_ps = MuMIn::get.models(models.sca, subset = TRUE) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(tidied = purrr::map(model, broom::tidy),
model_num = row_number()) %>%
select(model_num, tidied) %>%
unnest() %>%
filter(term == "wt") %>%
ungroup() %>%
select(model_num, estimate, std.error, p.value))# merge and tidy for plotting
plot.data = left_join(model_ps, model_params, by = "model_num") %>%
arrange(estimate) %>%
mutate(specification = row_number(),
significant.p = ifelse(p.value < .05, "yes", "no")) %>%
gather(variable, value, -estimate, -specification, -model_num, -std.error, -p.value, -significant.p) %>%
mutate(variable = gsub("[()]", "", variable),
variable = gsub("Intercept", "intercept", variable),
variable = gsub("as.factor(vs)1", "vs", variable)) %>%
spread(variable, value)
# get names of variables included in model
variable.names = names(select(plot.data, -estimate, -specification, -model_num, -std.error, -p.value, -significant.p))
# plot top panel
top = plot.data %>%
ggplot(aes(specification, estimate, color = significant.p)) +
geom_point(shape = "|", size = 4) +
#geom_hline(yintercept = null.df$AIC, linetype = "dashed", color = "lightblue") +
scale_color_manual(values = c("black", "red")) +
labs(x = "", y = "regression coefficient\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# set plotting order for variables based on number of times it's included in better fitting models
order = plot.data %>%
arrange(estimate) %>%
mutate(significant.p.num = ifelse(significant.p == "yes", 1, 0)) %>%
gather(variable, value, eval(variable.names)) %>%
filter(!is.na(value)) %>%
group_by(variable) %>%
mutate(order = sum(significant.p.num)) %>%
select(variable, order) %>%
unique()
# rename variables and plot bottom panel
bottom = plot.data %>%
gather(variable, value, eval(variable.names)) %>%
mutate(value = ifelse(!is.na(value), "|", ""),
variable = ifelse(variable == "(Intercept)", "intercept",
ifelse(variable == "as.factor(vs)1", "vs", variable))) %>%
left_join(., order, by = "variable") %>%
ggplot(aes(specification, reorder(variable, order), color = significant.p)) +
geom_text(aes(label = value)) +
scale_color_manual(values = c("black", "red")) +
labs(x = "\nspecification number", y = "variables\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# join panels
(wt = cowplot::plot_grid(top, bottom, ncol = 1, align = "v", labels = c('A', 'B')))# extract p-values for the intercept term
model_ps = MuMIn::get.models(models.sca, subset = TRUE) %>%
tibble() %>%
rename("model" = ".") %>%
mutate(tidied = purrr::map(model, broom::tidy),
model_num = row_number()) %>%
select(model_num, tidied) %>%
unnest() %>%
filter(term == "cyl") %>%
ungroup() %>%
select(model_num, estimate, std.error, p.value)# merge and tidy for plotting
plot.data = left_join(model_ps, model_params, by = "model_num") %>%
arrange(estimate) %>%
mutate(specification = row_number(),
significant.p = ifelse(p.value < .05, "yes", "no")) %>%
gather(variable, value, -estimate, -specification, -model_num, -std.error, -p.value, -significant.p) %>%
mutate(variable = gsub("[()]", "", variable),
variable = gsub("Intercept", "intercept", variable),
variable = gsub("as.factor(vs)1", "vs", variable)) %>%
spread(variable, value)
# get names of variables included in model
variable.names = names(select(plot.data, -estimate, -specification, -model_num, -std.error, -p.value, -significant.p))
# plot top panel
top = plot.data %>%
ggplot(aes(specification, estimate, color = significant.p)) +
geom_point(shape = "|", size = 4) +
#geom_hline(yintercept = null.df$AIC, linetype = "dashed", color = "lightblue") +
scale_color_manual(values = c("black", "red")) +
labs(x = "", y = "regression coefficient\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# set plotting order for variables based on number of times it's included in better fitting models
order = plot.data %>%
arrange(estimate) %>%
mutate(significant.p.num = ifelse(significant.p == "yes", 1, 0)) %>%
gather(variable, value, eval(variable.names)) %>%
filter(!is.na(value)) %>%
group_by(variable) %>%
mutate(order = sum(significant.p.num)) %>%
select(variable, order) %>%
unique()
# rename variables and plot bottom panel
bottom = plot.data %>%
gather(variable, value, eval(variable.names)) %>%
mutate(value = ifelse(!is.na(value), "|", ""),
variable = ifelse(variable == "(Intercept)", "intercept",
ifelse(variable == "as.factor(vs)1", "vs", variable))) %>%
left_join(., order, by = "variable") %>%
ggplot(aes(specification, reorder(variable, order), color = significant.p)) +
geom_text(aes(label = value)) +
scale_color_manual(values = c("black", "red")) +
labs(x = "\nspecification number", y = "variables\n") +
theme_minimal(base_size = 11) +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 9),
axis.text = element_text(color = "black"),
axis.line = element_line(colour = "black"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
# join panels and compare plots
(cyl = cowplot::plot_grid(top, bottom, ncol = 1, align = "v", labels = c('A', 'B')))wt… Inferential statistics using specification curves! … Plot standard errors!